/*
 *  LightWeightLearningApi.java
 *
 *  Yaoyong Li 22/03/2007
 *
 *  $Id: LightWeightLearningApi.java, v 1.0 2007-03-22 12:58:16 +0000 yaoyong $
 */
package gate.learning;

import gate.AnnotationSet;
import gate.Annotation;
import gate.Corpus;
import gate.Factory;
import gate.FeatureMap;
import gate.Node;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import java.util.logging.Level;
import org.apache.log4j.Logger;
import gate.Document;
import gate.learning.learners.ChunkOrEntity;
import gate.learning.learners.MultiClassLearning;
import gate.learning.learners.PostProcessing;
import gate.learning.learners.SupervisedLearner;
import gate.learning.learners.SvmLibSVM;
import gate.learning.learners.weka.WekaLearner;
import gate.learning.learners.weka.WekaLearning;
import gate.util.Benchmark;
import gate.util.Benchmarkable;
import gate.util.BomStrippingInputStreamReader;
import gate.util.GateException;
import gate.util.InvalidOffsetException;
import gate.util.OffsetComparator;
import java.io.FileWriter;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.util.Random;
import org.supercsv.cellprocessor.ConvertNullTo;
import org.supercsv.cellprocessor.ift.CellProcessor;
import org.supercsv.io.ICsvListWriter;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Utils;

/**
 * Do all the main learning tasks, such as obtaining the feature vectors from
 * GATE annotations, training and application. Also filtering out some negative
 * examples if want.
 */
public class LightWeightLearningApi extends Object implements Benchmarkable {

    /** This is where the model(s) should be saved */
    private File wd;
    /**
     * The annotationSet containing annotations considered in the DATASET element
     * of configuration file.
     */
    public String inputASName;
    /**
     * The annotationSet for the resulting annotations by application of models.
     */
    public String outputASName;
    /** Object of the NLP feature list. */
    public NLPFeaturesList featuresList;
    /** Object of the label list. */
    public Label2Id labelsAndId;
    /** Position of all features specified in the configuration file. */
    int[] featurePositionTotal;
    /** The left-most position among all features. */
    int maxNegPositionTotal = 0;
    /** The right-most position among all features. */
    int maxPosPositionTotal = 0;
    /** The weight for the Ngram features */
    float ngramWeight = 1.0f;
    /**
     * HashMap for the chunkLenStats, for post-processing of chunk learning.
     */
    HashMap chunkLenHash;

    List<String> headerList;
    List<CellProcessor> processorList;
    List predictionsList;

    /** Constructor, with working directory setting. */
    public LightWeightLearningApi(File wd) {
        this.wd = wd;
    }

    /**
     * Further initialisation for the main object LearningAPIMain().
     */
    public void furtherInit(File wdResults, LearningEngineSettings engineSettings) {
        int num;
        num = engineSettings.datasetDefinition.arrs.featurePosition.length;
        this.featurePositionTotal = new int[num];

        num = 0;
        for (int i = 0; i < engineSettings.datasetDefinition.arrs.featurePosition.length; ++i) {
            this.featurePositionTotal[num++] = engineSettings.datasetDefinition.arrs.featurePosition[i];
        }
        maxNegPositionTotal = 0;
        if (maxNegPositionTotal < engineSettings.datasetDefinition.arrs.maxNegPosition) {
            maxNegPositionTotal = engineSettings.datasetDefinition.arrs.maxNegPosition;
        }

        headerList = new ArrayList<String>();
        processorList = new ArrayList<CellProcessor>();
        predictionsList = new ArrayList();
    }

    /**
     * Obtain the features and labels and form feature vectors from the GATE
     * annotation of each document.
     */
    public void annotations2NLPFeatures(Document doc, int numDocs,
            BufferedWriter outNLPFeatures, boolean isTraining,
            LearningEngineSettings engineSettings) {
        AnnotationSet annotations = null;
        if (inputASName == null || inputASName.trim().length() == 0) {
            annotations = doc.getAnnotations();
        } else {
            annotations = doc.getAnnotations(inputASName);
        }
        // obtain the NLP features for the document
        String docName = doc.getName().replaceAll(ConstantParameters.ITEMSEPARATOR,
                "_");
        if (docName.contains("_")) {
            docName = docName.substring(0, docName.lastIndexOf("_"));
        }
        if (LogService.maxVerbosityLevel > 1) {
            System.out.println(numDocs + " docname=" + docName + ".");
        }
        NLPFeaturesOfDoc nlpFeaturesDoc = new NLPFeaturesOfDoc(annotations,
                engineSettings.datasetDefinition.getInstanceType(), docName);
        nlpFeaturesDoc.obtainDocNLPFeatures(annotations,
                engineSettings.datasetDefinition);
        nlpFeaturesDoc.writeNLPFeaturesToFile(outNLPFeatures, docName, numDocs,
                featurePositionTotal);
        return;
    }

    /**
     * Obtain the features and labels and form feature vectors from the GATE
     * annotation of each document.
     */
    public void annotations2NLPFeatures(Document doc, int numDocs,
            ICsvListWriter outNLPFeatures, boolean isTraining,
            LearningEngineSettings engineSettings) {
        AnnotationSet annotations = null;
        if (inputASName == null || inputASName.trim().length() == 0) {
            annotations = doc.getAnnotations();
        } else {
            annotations = doc.getAnnotations(inputASName);
        }
        // obtain the NLP features for the document
        String docName = doc.getName().replaceAll(ConstantParameters.ITEMSEPARATOR,
                "_");
        if (docName.contains("_")) {
            docName = docName.substring(0, docName.lastIndexOf("_"));
        }
        if (LogService.maxVerbosityLevel > 1) {
            System.out.println(numDocs + " docname=" + docName + ".");
        }
        NLPFeaturesOfDoc nlpFeaturesDoc = new NLPFeaturesOfDoc(annotations,
                engineSettings.datasetDefinition.getInstanceType(), docName);
        nlpFeaturesDoc.obtainDocNLPFeatures(annotations,
                engineSettings.datasetDefinition);
        nlpFeaturesDoc.writeNLPFeaturesToFile(outNLPFeatures, docName, numDocs,
                featurePositionTotal, engineSettings.datasetDefinition);
        return;
    }

    /**
     * Obtain the features and labels and form feature vectors from the GATE
     * annotation of each document.
     */
    public void annotations2Instances(Document doc, int positionDoc, 
            ICsvListWriter outPredictionsCsv, BufferedWriter arffSequenceFeatureFile, boolean isTraining,
            LearningEngineSettings engineSettings, Instances instancesHeader, Classifier classifierModel) {
        int numFalse = 0;
        int numTrue = 0;
        try {
            // Create input annotation list
            AnnotationSet inputAnnotations = null;
            if (inputASName == null || inputASName.trim().length() == 0) {
                inputAnnotations = doc.getAnnotations();
            } else {
                inputAnnotations = doc.getAnnotations(inputASName);
            }
            List<Annotation> inputAnnotationList = new ArrayList<Annotation>(inputAnnotations.get(engineSettings.datasetDefinition.instanceType));
            Collections.sort(inputAnnotationList, new OffsetComparator());

            // Create output annotation list
            AnnotationSet outputAnnotations = null;
            if (outputASName == null || outputASName.trim().length() == 0) {
                outputAnnotations = doc.getAnnotations();
            } else {
                outputAnnotations = doc.getAnnotations(outputASName);
            }
            List<Annotation> outputAnnotationList = new ArrayList<Annotation>(outputAnnotations.get(engineSettings.datasetDefinition.instanceType));
            Collections.sort(outputAnnotationList, new OffsetComparator());

            // Create class type annotation set
            AnnotationSet classTypeList = inputAnnotations.get(engineSettings.datasetDefinition.arrs.classType);

            // Create WEKA attributes
            FastVector attributes = new FastVector();
            for (int j = 0; j < engineSettings.datasetDefinition.arrs.numTypes; j++) {
                weka.core.Attribute tWekaAttribute = new weka.core.Attribute(engineSettings.datasetDefinition.arrs.namesInDataSetDef[j] + "[" + engineSettings.datasetDefinition.arrs.featurePosition[j] + "]", new FastVector());
                attributes.addElement(tWekaAttribute);
            }
            
            // Iterate instance to create class label list
            String[] classValues = new String[inputAnnotationList.size() + 1];
            int annId = -1;
            boolean isInEntityAnn = false;
            int maxSpan = 0;
            for (int i = 0; i < inputAnnotationList.size(); i++) {
                Annotation instanceAnn = inputAnnotationList.get(i);
                List<Annotation> classCoveringList = new ArrayList(
                        classTypeList.getCovering(
                            null,
                            instanceAnn.getStartNode().getOffset(),
                            instanceAnn.getEndNode().getOffset()));
                if(classCoveringList.size() > 0){
                    classValues[i] = (String) classCoveringList.get(0).getFeatures().get(engineSettings.datasetDefinition.arrs.classFeature);
                    if(isInEntityAnn == true && classCoveringList.get(0).getId().intValue() != annId && maxSpan > 1){
                        classValues[i - 1] += ConstantParameters.SUFFIXENDTOKEN;
                    }
                    if(isInEntityAnn == false || classCoveringList.get(0).getId().intValue() != annId){
                        maxSpan = 1;
                        classValues[i] += ConstantParameters.SUFFIXSTARTTOKEN;
                    }else{
                        maxSpan++;
                    }
                    annId = classCoveringList.get(0).getId().intValue();
                    isInEntityAnn = true;
                }else{
                    if(isInEntityAnn == true && maxSpan > 1){
                        classValues[i - 1] += ConstantParameters.SUFFIXENDTOKEN;
                    }
                    classValues[i] = ConstantParameters.OUTOFCLASSNAME;
                    maxSpan = 0;
                    isInEntityAnn = false;
                }
            }

            // Print instances header, if this is first document
            OutputStreamWriter fstream =
                    new OutputStreamWriter(
                    new FileOutputStream(
                        new File("out.txt"),
                        true),
                    Charset.defaultCharset());
            BufferedWriter out = new BufferedWriter(fstream);
            if(positionDoc == 0){
                //out.write(instancesHeader + "\n");
                //arffSequenceFeatureFile.write(instancesHeader + "\n");
            }
            out.close();

            // Iterate each instance to create intances
            for (int i = 0; i < inputAnnotationList.size(); i++) {

                String str = "";
                Instance instance = new Instance(engineSettings.datasetDefinition.arrs.numTypes +
                        engineSettings.datasetDefinition.arrs.numPredictedClass + 1);
                instance.setDataset(instancesHeader);

                Annotation instanceAnn = inputAnnotationList.get(i);
                int numFeature = 0;

                // Class
                if(instancesHeader.attribute(numFeature).indexOfValue(classValues[i]) >= 0){
                    instance.setValue(numFeature, instancesHeader.attribute(numFeature).indexOfValue(classValues[i]));
                }else{
                    instance.setMissing(numFeature);
                }
                numFeature++;

                // Features
                for (int j = 0; j < engineSettings.datasetDefinition.arrs.numTypes; j++) {
                    if (i + engineSettings.datasetDefinition.arrs.featurePosition[j] >= 0 && i +
                            engineSettings.datasetDefinition.arrs.featurePosition[j] < inputAnnotationList.size()) {
                        Annotation tAnn = inputAnnotationList.get(i + engineSettings.datasetDefinition.arrs.featurePosition[j]);
                        if(tAnn.getFeatures().get(engineSettings.datasetDefinition.arrs.featuresInDataSetDef[j]) != null){
                            str = new String(tAnn.getFeatures().get(engineSettings.datasetDefinition.arrs.featuresInDataSetDef[j]).toString().getBytes(Charset.defaultCharset()), Charset.defaultCharset());
                        }else{
                            str = "null";
                        }
                        if(instancesHeader.attribute(numFeature).indexOfValue(str) >= 0){
                            instance.setValue(numFeature, instancesHeader.attribute(numFeature).indexOfValue(str));
                        }else{
                            instance.setMissing(numFeature);
                            fstream =
                                new OutputStreamWriter(
                                new FileOutputStream(
                                    new File("out.txt"),
                                    true),
                                Charset.defaultCharset());
                            //out = new BufferedWriter(fstream);
                            //out.write(instancesHeader.attribute(numFeature) + "<-->" + str + "\n");
                            //out.write(fstream.getEncoding() + "\n");
                            if(str == null){
                                str = "";
                            }
                            //out.write(instancesHeader.attribute(numFeature).indexOfValue(str) + "\n");
                            out.close();
                        }
                    } else {
                        str = null;
                        instance.setMissing(numFeature);
                    }
                    numFeature++;
                }

                // Predicted classes
                for(int j = 0; j < engineSettings.datasetDefinition.arrs.numPredictedClass; j++){
                    if (i + engineSettings.datasetDefinition.arrs.predictedClassPosition[j] >= 0 &&
                            i + engineSettings.datasetDefinition.arrs.predictedClassPosition[j] < inputAnnotationList.size()) {
                        Annotation tAnn = inputAnnotationList.get(i + engineSettings.datasetDefinition.arrs.predictedClassPosition[j]);

                        // Create predicted class type list
                        AnnotationSet predictedClassTypeList = outputAnnotations.get(engineSettings.datasetDefinition.arrs.classType);
                        List<Annotation> predictedClassCoveringList = new ArrayList(
                                predictedClassTypeList.getCovering(
                                    null,
                                    tAnn.getStartNode().getOffset(),
                                    tAnn.getEndNode().getOffset()));
                        String predictedClassValue = "";
                        if(predictedClassCoveringList.size() > 0){
                            predictedClassValue = (String) predictedClassCoveringList.get(0).getFeatures().get(engineSettings.datasetDefinition.arrs.classFeature);
                        }else{
                            predictedClassValue = ConstantParameters.OUTOFCLASSNAME;
                        }
                        instance.setValue(numFeature, instancesHeader.attribute(numFeature).indexOfValue(predictedClassValue));
                        if(instancesHeader.attribute(numFeature).indexOfValue(predictedClassValue) >= 0){
                            instance.setValue(numFeature, instancesHeader.attribute(numFeature).indexOfValue(predictedClassValue));
                        }else{
                            instance.setMissing(numFeature);
                        }
                    } else {
                        instance.setMissing(numFeature);
                    }
                    numFeature++;
                }

                // Classify instance
                double resultClassLabel = classifierModel.classifyInstance(instance);
                double[] distributionArr = classifierModel.distributionForInstance(instance);

                String label = instancesHeader.attribute(0).value((int) resultClassLabel);
                FeatureMap classificationResultFeatures = Factory.newFeatureMap();
                classificationResultFeatures.put(engineSettings.datasetDefinition.arrs.classFeature,
                        classValues[i]);
                classificationResultFeatures.put("predicted_" + engineSettings.datasetDefinition.arrs.classFeature,
                        instancesHeader.attribute(0).value((int) resultClassLabel));
                classificationResultFeatures.put("probability", distributionArr[(int)resultClassLabel]);
                classificationResultFeatures.put("inst#", LearningAPIMain2.positionInstance);
                
                //classificationResultFeatures.put("instance", instance.toString());
                /*String distributionStr = "";
                for(int distIdx = 0; distIdx < distributionArr.length; distIdx++){
                    distributionStr = distributionStr + instancesHeader.attribute(0).value(distIdx) + String.format("(%.2f) ", distributionArr[distIdx]);
                }*/
                //classificationResultFeatures.put("distribution", distributionStr);
                boolean boolResult;
                if(classValues[i].equalsIgnoreCase(label)){
                    outputAnnotations.add(instanceAnn.getStartNode().getOffset(), instanceAnn.getEndNode().getOffset(),
                            "True", classificationResultFeatures);
                    numTrue++;
                    boolResult = true;
                }else{
                    outputAnnotations.add(instanceAnn.getStartNode().getOffset(), instanceAnn.getEndNode().getOffset(),
                            "False", classificationResultFeatures);
                    numFalse++;
                    boolResult = false;
                }

                // Write classification result to file (dihapus, bikin lambat)
                /*FeatureMap classificationLabelFeatures = Factory.newFeatureMap();
                classificationLabelFeatures.put(engineSettings.datasetDefinition.arrs.classFeature,
                        instancesHeader.attribute(0).value((int) resultClassLabel));
                outputAnnotations.add(instanceAnn.getStartNode().getOffset(), instanceAnn.getEndNode().getOffset(),
                        engineSettings.datasetDefinition.arrs.classType, classificationLabelFeatures);*/

                // Write classification result to CSV file
                if (LearningAPIMain2.positionInstance == 1) {
                    headerList.add("Inst#");
                    headerList.add("Actual Class");
                    headerList.add("Predicted Class");
                    headerList.add("Error");
                    for (int j = 0; j < distributionArr.length; j++) {
                        headerList.add(String.format("Dist(%s)", instancesHeader.attribute(0).value(j)));
                        processorList.add(new ConvertNullTo(""));
                    }
                    for(int j = 0; j < engineSettings.datasetDefinition.arrs.numTypes; j++){
                        headerList.add(String.format(
                                "%s(%d)",
                                engineSettings.datasetDefinition.arrs.namesInDataSetDef[j],
                                engineSettings.datasetDefinition.arrs.featurePosition[j]));
                        processorList.add(new ConvertNullTo(""));
                    }
                    for(int j = 0; j < engineSettings.datasetDefinition.arrs.numPredictedClass; j++){
                        headerList.add(String.format(
                                "%s(%d)",
                                engineSettings.datasetDefinition.arrs.namesInPredictedClasses[j],
                                engineSettings.datasetDefinition.arrs.predictedClassPosition[j]));
                        processorList.add(new ConvertNullTo(""));
                    }
                    outPredictionsCsv.write(headerList);
                }

                predictionsList.add(LearningAPIMain2.positionInstance);
                predictionsList.add(classValues[i]);
                predictionsList.add(instancesHeader.attribute(0).value((int) resultClassLabel));
                predictionsList.add(boolResult);
                for(int distIdx = 0; distIdx < distributionArr.length; distIdx++){
                    predictionsList.add(distributionArr[distIdx]);
                }

                processorList.add(0, new ConvertNullTo(""));
                for(int j = 0; j < engineSettings.datasetDefinition.arrs.numPredictedClass; j++){
                    processorList.add(new ConvertNullTo(""));
                }
                CellProcessor[] tProcessors = processorList.toArray(new CellProcessor[processorList.size()]);

                for (int j = 1; j < instance.numAttributes(); j++) {
                    predictionsList.add(instance.toString(j));
                }
                outPredictionsCsv.write(predictionsList);
                predictionsList.clear();

                instancesHeader.add(instance);

                arffSequenceFeatureFile.write(instance.toString() + "\n");

                LearningAPIMain2.positionInstance++;
                
            }

            //System.out.println("True=" + numTrue + "\n");
            //System.out.println("False=" + numFalse + "\n");

            return;
        } catch (IOException ex) {
            java.util.logging.Logger.getLogger(LightWeightLearningApi.class.getName()).log(Level.SEVERE, null, ex);
        } catch (Exception ex) {
            java.util.logging.Logger.getLogger(LightWeightLearningApi.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    /** Normalizing the feature vectors. */
    static void normaliseFVs(DocFeatureVectors docFV) {
        for (int i = 0; i < docFV.numInstances; ++i) {
            double sum = 0;
            for (int j = 0; j < docFV.fvs[i].len; ++j) {
                sum += docFV.fvs[i].nodes[j].value * docFV.fvs[i].nodes[j].value;
            }
            sum = Math.sqrt(sum);
            for (int j = 0; j < docFV.fvs[i].len; ++j) {
                docFV.fvs[i].nodes[j].value /= sum;
            }
        }
    }

    /**
     * Finishing the conversion from annotations to feature vectors by writing
     * back the label and nlp feature list into files, and closing the java
     * writers.
     */
    public void finishFVs(File wdResults, int numDocs, boolean isTraining,
            LearningEngineSettings engineSettings) {
        if (isTraining && engineSettings.isNLPFeatListUpdatable) {
            featuresList.writeListIntoFile(wdResults,
                    ConstantParameters.FILENAMEOFNLPFeatureList, "UTF-8");
        }
        if (isTraining && engineSettings.isLabelListUpdatable) {
            labelsAndId.writeLabelAndIdToFile(wdResults,
                    ConstantParameters.FILENAMEOFLabelList);
        }
        if (isTraining & engineSettings.surround) {
            ChunkLengthStats.writeChunkLensStatsIntoFile(wdResults,
                    ConstantParameters.FILENAMEOFChunkLenStats, chunkLenHash);
        }
    }

    /** transfer the feature list to ngram (language model). */
    public void featureList2LM(File wdResults, int nGram) {
        featuresList.writeToLM(wdResults, ConstantParameters.FILENAMEOFNgramLM,
                nGram);
    }

    /** Convert the NLP features into feature vectors and write them into file. */
    public void nlpfeatures2FVs(File wdResults, BufferedReader inNLPFeatures,
            BufferedWriter outFeatureVectors, int numDocs, boolean isTraining,
            LearningEngineSettings engineSettings) {
        try {
            // BufferedWriter outFeatureVectors = new BufferedWriter(new
            // OutputStreamWriter(new FileOutputStream(
            // new File(wdResults,ConstantParameters.FILENAMEOFFeatureVectorData),
            // true), "UTF-8"));
            // Read the first line out which is about feature names
            inNLPFeatures.readLine();
            for (int i = 0; i < numDocs; ++i) {
                NLPFeaturesOfDoc nlpFeatDoc = new NLPFeaturesOfDoc();
                nlpFeatDoc.readNLPFeaturesFromFile(inNLPFeatures);
                DocFeatureVectors docFV = new DocFeatureVectors();
                docFV.obtainFVsFromNLPFeatures(nlpFeatDoc, featuresList,
                        featurePositionTotal, maxNegPositionTotal, featuresList.totalNumDocs,
                        ngramWeight, engineSettings.datasetDefinition.valueTypeNgram);
                /*if(engineSettings.datasetDefinition.isSameWinSize) {
                // expand the feature vector to include the context words
                docFV.expandFV(engineSettings.datasetDefinition.windowSizeLeft,
                engineSettings.datasetDefinition.windowSizeRight);
                }*/
                if (isTraining) {
                    LabelsOfFeatureVectorDoc labelsDoc = new LabelsOfFeatureVectorDoc();
                    labelsDoc.obtainMultiLabelsFromNLPDocSurround(nlpFeatDoc,
                            labelsAndId, engineSettings.surround);
                    //addDocFVsMultiLabelToFile(i, outFeatureVectors,
                    //labelsDoc.multiLabels, docFV);
                    docFV.addDocFVsMultiLabelToFile(i, outFeatureVectors,
                            labelsDoc.multiLabels);
                } else {
                    int[] labels = new int[nlpFeatDoc.numInstances];
                    //addDocFVsToFile(i, outFeatureVectors, labels, docFV);
                    docFV.addDocFVsToFile(i, outFeatureVectors, labels);
                }
            }
            // outFeatureVectors.flush();
            // outFeatureVectors.close();
        } catch (IOException e) {
            System.out.println("Error occured in reading the NLP data from file for converting to FVs"
                    + "or writing the FVs data into file!");
        }
    }

    /** Write the FVs of one document into file. */
    /*void addDocFVsToFile(int index, BufferedWriter out, int[] labels,
    DocFeatureVectors docFV) {
    try {
    out.write(new Integer(index) + ConstantParameters.ITEMSEPARATOR
    + new Integer(docFV.numInstances) + ConstantParameters.ITEMSEPARATOR
    + docFV.docId);
    out.newLine();
    for(int i = 0; i < docFV.numInstances; ++i) {
    StringBuffer line = new StringBuffer();
    line.append(new Integer(i + 1) + ConstantParameters.ITEMSEPARATOR
    + new Integer(labels[i]));
    for(int j = 0; j < docFV.fvs[i].len; ++j)
    line.append(ConstantParameters.ITEMSEPARATOR
    + docFV.fvs[i].nodes[j].index + ConstantParameters.INDEXVALUESEPARATOR
    + docFV.fvs[i].nodes[j].value);
    out.write(line.toString());
    out.newLine();
    }
    } catch(IOException e) {
    }
    }*/
    /** Write the FVs with labels of one document into file. */
    /*void addDocFVsMultiLabelToFile(int index, BufferedWriter out,
    LabelsOfFV[] multiLabels, DocFeatureVectors docFV) {
    try {
    out.write(new Integer(index) + ConstantParameters.ITEMSEPARATOR
    + new Integer(docFV.numInstances) + ConstantParameters.ITEMSEPARATOR
    + docFV.docId);
    out.newLine();
    for(int i = 0; i < docFV.numInstances; ++i) {
    StringBuffer line = new StringBuffer();
    line.append(new Integer(i + 1) + ConstantParameters.ITEMSEPARATOR
    + multiLabels[i].num);
    for(int j = 0; j < multiLabels[i].num; ++j)
    line.append(ConstantParameters.ITEMSEPARATOR
    + multiLabels[i].labels[j]);
    for(int j = 0; j < docFV.fvs[i].len; ++j)
    line.append(ConstantParameters.ITEMSEPARATOR
    + docFV.fvs[i].nodes[j].index + ConstantParameters.INDEXVALUESEPARATOR
    + docFV.fvs[i].nodes[j].value);
    out.write(line.toString());
    out.newLine();
    }
    } catch(IOException e) {
    }
    }*/
    /** Order and select unlabelled documents for active learning. */
    public void orderDocsWithModels(File wdResults,
            LearningEngineSettings engineSettings) {
        try {
            // Reading the names of all documents and the total number of them
            int numDocs = 0;
            // Read the names of documents from a file
            BufferedReader inDocsName = new BomStrippingInputStreamReader(
                    new FileInputStream(new File(wdResults,
                    ConstantParameters.FILENAMEOFDocsName)), "UTF-8");
            String str = inDocsName.readLine();
            numDocs = Integer.parseInt(str.substring(str.indexOf("=") + 1));
            String[] docsName = new String[numDocs];
            for (int i = 0; i < numDocs; ++i) {
                docsName[i] = inDocsName.readLine();
            }
            inDocsName.close();
            // Read the selected document
            int numDocsSelected;
            Vector selectedDocs = new Vector();
            File docsFile = new File(wdResults,
                    ConstantParameters.FILENAMEOFSelectedDOCForAL);
            if (docsFile.exists()) {
                numDocsSelected = obtainDocsForAL(docsFile, selectedDocs);
            } else {
                System.out.println("!!! Warning: Cannot get the information about the "
                        + "number of documents for selecting!!");
                return;
            }
            boolean[] countedDocs = new boolean[numDocs];
            int[] indexSortedDocs = new int[numDocs];
            // first apply the current models to the data
            File dataFile = new File(wdResults,
                    ConstantParameters.FILENAMEOFFVDataSelecting);
            if (!dataFile.exists()) {
                System.out.println("!!! Warning: The data file named "
                        + ConstantParameters.FILENAMEOFFVDataSelecting
                        + " doesn't exist. The file should be used to store the fv data for selecting doc!");
                return;
            }
            File modelFile = new File(wdResults, ConstantParameters.FILENAME_OF_MODEL);
            int learnerType;
            learnerType = obtainLearnerType(engineSettings.learnerSettings.implementationName);
            int numClasses = 0;
            switch (learnerType) {
                case 1: // for weka learner
                    LogService.logMessage("Use weka learner.", 1);
                    System.out.println("!! Warning: Currently there is no implementation for the Weka's learner "
                            + "to select document for active learning.");
                    LogService.logMessage(
                            "!! Warning: Currently there is no implementation for the Weka's learner "
                            + "to select document for active learning.", 1);
                    break;
                case 2: // for learner of multi to binary conversion
                    LogService.logMessage("Multi to binary conversion.", 1);
                    // System.out.println("** multi to binary:");
                    String dataSetFile = null;
                    //get a learner
                    String learningCommand = engineSettings.learnerSettings.options;
                    learningCommand = learningCommand.trim();
                    learningCommand = learningCommand.replaceAll("[ \t]+", " ");
                    SupervisedLearner paumLearner = MultiClassLearning.obtainLearnerFromName(engineSettings.learnerSettings.implementationName,
                            learningCommand, dataSetFile);
                    paumLearner.setLearnerExecutable(engineSettings.learnerSettings.executableTraining);
                    paumLearner.setLearnerParams(engineSettings.learnerSettings.options);
                    LogService.logMessage(
                            "The learners: " + paumLearner.getLearnerName(), 1);
                    MultiClassLearning chunkLearning = new MultiClassLearning(
                            engineSettings.multi2BinaryMode);
                    // read data
                    File tempDataFile = new File(wdResults,
                            ConstantParameters.TempFILENAMEofFVData);
                    boolean isUsingTempDataFile = false;
                    //if(paumLearner.getLearnerName().equals("SVMExec"))
                    //isUsingTempDataFile = true;
                    chunkLearning.getDataFromFile(numDocs, dataFile, isUsingTempDataFile, tempDataFile);
                    // Apply the model to the data which was read already.
                    chunkLearning.apply(paumLearner, modelFile);
                    // labelsFVDoc = chunkLearning.dataFVinDoc.labelsFVDoc;
                    numClasses = chunkLearning.numClasses;
                    if (engineSettings.multi2BinaryMode == 2) // for one vs another mode
                    {
                        if (chunkLearning.numNull > 0) {
                            numClasses = (numClasses + 1) * numClasses / 2;
                        } else {
                            numClasses = (numClasses - 1) * numClasses / 2;
                        }
                    }
                    // Compute the margins for the instances in the document
                    // first set which documents were selected, which were not
                    for (int i = 0; i < numDocs; ++i) {
                        if (selectedDocs.contains(docsName[i])) {
                            countedDocs[i] = false;
                        } else {
                            countedDocs[i] = true;
                        }
                    }
                    // then compute the margins for documents
                    float[] marginsD = new float[numDocs];
                    float optB = (1 - ((SvmLibSVM) paumLearner).tau)
                            / (1 + ((SvmLibSVM) paumLearner).tau);
                    computeDocBasedMargins(chunkLearning.dataFVinDoc.labelsFVDoc,
                            numClasses, engineSettings.multi2BinaryMode, optB,
                            engineSettings.alSetting, marginsD);
                    // get the biggest ones
                    // setting the smallest value to remove those not counted
                    for (int i = 0; i < numDocs; ++i) {
                        if (!countedDocs[i]) {
                            marginsD[i] = 99999;
                        }
                    }
                    LightWeightLearningApi.sortFloatAscIndex(marginsD, indexSortedDocs,
                            numDocs, numDocs);
                    // write the ranked documents into a file
                    BufferedWriter outDocs = new BufferedWriter(new OutputStreamWriter(
                            new FileOutputStream(new File(wdResults,
                            ConstantParameters.FILENAMEOFRankedDOCForAL)), "UTF-8"));
                    // String [] str =
                    // inDocs.readLine().split(ConstantParameters.ITEMSEPARATOR);
                    int numRanked = numDocs - numDocsSelected;
                    outDocs.append("##numDocsRanked=" + numRanked);
                    outDocs.newLine();
                    for (int i = 0; i < numDocs; ++i) {
                        int kk = numDocs - i - 1;
                        if (countedDocs[indexSortedDocs[kk]]) {
                            outDocs.append(docsName[indexSortedDocs[kk]]
                                    + ConstantParameters.ITEMSEPARATOR
                                    + marginsD[indexSortedDocs[kk]]);
                            outDocs.newLine();
                        }
                    }
                    outDocs.flush();
                    outDocs.close();
                    break;
                default:
                    System.out.println("Error! Wrong learner type.");
                    LogService.logMessage("Error! Wrong learner type.", 0);
            }
        } catch (GateException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * obtained the total number of docs and the selected docs for AL.
     *
     * @throws IOException
     */
    private int obtainDocsForAL(File docsFile, List<String> selectedDocs)
            throws IOException {
        int numDocsSelected = 0;
        BufferedReader inDocs = new BomStrippingInputStreamReader(
                new FileInputStream(docsFile), "UTF-8");
        String str = inDocs.readLine();
        while (str != null && str != "") {
            selectedDocs.add(str);
            str = inDocs.readLine();
            ++numDocsSelected;
        }
        inDocs.close();
        return numDocsSelected;
    }

    /**
     * compute the margins of tokens to class hyperplane and averaged over tokens
     * in document
     */
    private void computeDocBasedMargins(LabelsOfFeatureVectorDoc[] labelsFVDoc,
            int numClasses, int multi2BinaryMode, float optB,
            ActiveLearningSetting als, float[] marginsD) {
        int numDoc = labelsFVDoc.length;
        for (int nd = 0; nd < numDoc; ++nd) {
            int numInstances = labelsFVDoc[nd].multiLabels.length;
            float[] valueInst = new float[numInstances];
            for (int i = 0; i < numClasses; ++i) {
                for (int ni = 0; ni < numInstances; ++ni) {
                    valueInst[ni] = (float) UsefulFunctions.inversesigmoid((double) labelsFVDoc[nd].multiLabels[ni].probs[i]);
                    if (multi2BinaryMode == 1) {
                        valueInst[ni] -= optB;
                    }
                    if (valueInst[ni] > 0) {
                        valueInst[ni] = -valueInst[ni]; // get the
                    }          // negative
                    // values to be
                    // used the
                    // sorting
                    // method
                }// end of loop over all instances
                // select the biggest
                if (als.numTokensSelect > 0) {
                    int numExamples = als.numTokensSelect;
                    if (numExamples > numInstances) {
                        numExamples = numInstances;
                    }
                    int[] indexSort = new int[numExamples];
                    LightWeightLearningApi.sortFloatAscIndex(valueInst, indexSort,
                            numInstances, numExamples);
                    float sum = 0;
                    for (int j = 0; j < numExamples; ++j) {
                        sum += valueInst[indexSort[j]]; // still use the negative values
                    }
                    sum /= numExamples;
                    marginsD[nd] -= sum; // because values are negative.
                } else {
                    float sum = 0;
                    int k = 0;
                    for (int j = 0; j < numInstances; ++j) {
                        if (valueInst[j] > -0.5) {
                            sum += valueInst[j];
                            ++k;
                        }
                    }
                    if (k > 0) {
                        sum /= k;
                    } else {
                        sum = -1.0f;
                    }
                    marginsD[nd] -= sum; // because values are negative.
                }
            }// end of loop over all classes
        }// end of loop over all documents
    }

    /**
     * Obtain the term-frequence matrix for each documents from the feature vector
     * files
     */
    public void termfrequenceMatrix(File wdResults, int numDocs) {
        try {
            BufferedWriter outTFs = new BufferedWriter(new OutputStreamWriter(
                    new FileOutputStream(new File(wdResults,
                    ConstantParameters.FILENAMEOFTermFreqMatrix), true), "UTF-8"));
            BufferedReader inFVs = new BomStrippingInputStreamReader(
                    new FileInputStream(new File(wdResults,
                    ConstantParameters.FILENAMEOFFeatureVectorData)), "UTF-8");
            HashMap<Integer, String> indexTerm = new HashMap<Integer, String>();
            for (Object obj : featuresList.featuresList.keySet()) {
                indexTerm.put(
                        new Integer(featuresList.featuresList.get(obj).toString()), obj.toString());
            }
            for (int nd = 0; nd < numDocs; ++nd) {
                String[] ts = inFVs.readLine().split(ConstantParameters.ITEMSEPARATOR);
                outTFs.append(ts[0] + " Documentname=\"" + ts[2] + "\", has " + ts[1]
                        + " parts:");
                outTFs.newLine();
                int npart = Integer.parseInt(ts[1]);
                for (int i = 0; i < npart; ++i) {
                    String[] ts1 = inFVs.readLine().split(
                            ConstantParameters.ITEMSEPARATOR);
                    HashMap<String, Integer> termFreq = new HashMap<String, Integer>();
                    int bindex = 2 + Integer.parseInt(ts1[1]);
                    for (int j = bindex; j < ts1.length; ++j) {
                        int isep = ts1[j].indexOf(ConstantParameters.INDEXVALUESEPARATOR);
                        Integer index = new Integer(ts1[j].substring(0, isep));
                        Integer valI = new Integer((int) (Float.parseFloat((ts1[j].substring(isep + 1)))));
                        termFreq.put(indexTerm.get(index), valI);
                    }
                    List<String> keys = new ArrayList<String>(termFreq.keySet());
                    Collections.sort(keys);
                    StringBuffer sb = new StringBuffer();
                    for (int j = 0; j < keys.size(); ++j) {
                        String ks = keys.get(j);
                        String str = ks;
                        if (str.contains("<>")) { // if it is a ngram feature
                            str = str.substring(str.indexOf("_", 1) + 1, str.lastIndexOf("<>"));
                            sb.append(str + ConstantParameters.INDEXVALUESEPARATOR
                                    + termFreq.get(ks) + " ");
                        }
                    }
                    outTFs.append(sb.toString().trim());
                    outTFs.newLine();
                }
            }
            inFVs.close();
            outTFs.flush();
            outTFs.close();
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /** Copy the NLP features from tempory file to normal file. */
    public void copyNLPFeat2NormalFile(File wdResults, int miNumDocsTraining) {
        try {
            BufferedWriter outNLPFeatures = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(new File(wdResults,
                    ConstantParameters.FILENAME_FEATURES_CSV), true), "UTF-8"));
            BufferedReader inNLPFeaturesTemp = new BomStrippingInputStreamReader(new FileInputStream(new File(wdResults,
                    ConstantParameters.FILENAMEOFNLPFeaturesDataTemp)), "UTF-8");
            String line = inNLPFeaturesTemp.readLine();
            if (miNumDocsTraining == 0) {
                outNLPFeatures.append(line);
                outNLPFeatures.newLine();
            }
            line = inNLPFeaturesTemp.readLine();
            while (line != null) {
                outNLPFeatures.append(line);
                outNLPFeatures.newLine();
                line = inNLPFeaturesTemp.readLine();
            }
            inNLPFeaturesTemp.close();
            outNLPFeatures.flush();
            outNLPFeatures.close();
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * Training using the Java implementation of learning algorithms.
     */
    public void trainingJava(int numDocs, LearningEngineSettings engineSettings)
            throws GateException {
        LogService.logMessage("\nTraining starts.\n", 1);
        // The files for training data and model
        File wdResults = new File(wd, ConstantParameters.SUBDIRFORRESULTS);
        String nlpFileName = wdResults.toString() + File.separator
                + ConstantParameters.FILENAME_FEATURES_CSV;
        String modelFileName = wdResults.toString() + File.separator
                + ConstantParameters.FILENAME_OF_MODEL;
        
        File nlpDataFile = new File(nlpFileName);
        File modelFile = new File(modelFileName);
        String learnerType = engineSettings.learnerSettings.learnerType;
        
        if(learnerType.equalsIgnoreCase("weka")) {
            Classifier model;
            try {
                LogService.logMessage("Use weka learner.", 1);
                model = (Classifier) Class.forName(engineSettings.learnerSettings.implementationName).newInstance();
                model.setOptions(Utils.splitOptions(engineSettings.learnerSettings.options));
                //model.buildClassifier(trainData);
            } catch (InstantiationException ex) {
                java.util.logging.Logger.getLogger(LightWeightLearningApi.class.getName()).log(Level.SEVERE, null, ex);
            } catch (IllegalAccessException ex) {
                java.util.logging.Logger.getLogger(LightWeightLearningApi.class.getName()).log(Level.SEVERE, null, ex);
            } catch (ClassNotFoundException ex) {
                java.util.logging.Logger.getLogger(LightWeightLearningApi.class.getName()).log(Level.SEVERE, null, ex);
            } catch (Exception ex) {
                java.util.logging.Logger.getLogger(LightWeightLearningApi.class.getName()).log(Level.SEVERE, null, ex);
            }
        }else{
            System.out.println("Error! Wrong learner type.");
            LogService.logMessage("Error! Wrong learner type.", 0);
        }
    }

    /**
     * Training using the Java implementation of learning algorithms.
     */
    public void trainingWeka(Instances trainData, LearningEngineSettings engineSettings)
            throws GateException {
        LogService.logMessage("\nTraining starts.\n", 1);
        // The files for training data and model
        File wdResults = new File(wd, ConstantParameters.SUBDIRFORRESULTS);
        String modelFileName = wdResults.toString() + File.separator
                + ConstantParameters.FILENAME_OF_MODEL;
        String modelOutputFileName = wdResults.toString() + File.separator
                + ConstantParameters.FILENAME_OF_MODEL_OUTPUT;

        Classifier model;
        try {
            LogService.logMessage("Use weka learner.", 1);

            // build classifier model
            model = (Classifier) Class.forName(engineSettings.learnerSettings.implementationName).newInstance();
            model.setOptions(Utils.splitOptions(engineSettings.learnerSettings.options));
            model.buildClassifier(trainData);

            // save model to file
            OutputStream os = new FileOutputStream(modelFileName);
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
            objectOutputStream.writeObject(model);
            objectOutputStream.close();
            System.out.println("Save model object to " + modelFileName);

            // save model output to text file
            BufferedWriter modelOutputWriter = new BufferedWriter(
                new OutputStreamWriter(
                new FileOutputStream(
                new File(
                modelOutputFileName)),
                "UTF-8"));
            modelOutputWriter.write(model.toString());
            modelOutputWriter.close();
            System.out.println("Save model output to text file " + modelOutputFileName);

            // Evaluate
            Evaluation eval = new Evaluation(trainData);
            eval.crossValidateModel(model, trainData, 10, new Random(1));
            System.out.println(eval.toSummaryString("\nResults 1\n======\n", false));

        } catch (InstantiationException ex) {
            java.util.logging.Logger.getLogger(LightWeightLearningApi.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IllegalAccessException ex) {
            java.util.logging.Logger.getLogger(LightWeightLearningApi.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ClassNotFoundException ex) {
            java.util.logging.Logger.getLogger(LightWeightLearningApi.class.getName()).log(Level.SEVERE, null, ex);
        } catch (Exception ex) {
            java.util.logging.Logger.getLogger(LightWeightLearningApi.class.getName()).log(Level.SEVERE, null, ex);
        }
        
    }

    /**
     * Apply the model to data, also using the learning algorithm implemented in
     * Java.
     */
    public void applyModelInJava(Corpus corpus, int startDocId, int endDocId,
            String labelName, LearningEngineSettings engineSettings, File tempFilesDir)
            throws GateException {
        int numDocs = endDocId - startDocId;
        LogService.logMessage("\nApplication starts.", 1);
        // The files for training data and model
        File wdResults = new File(wd, ConstantParameters.SUBDIRFORRESULTS);
        String fvFileName = tempFilesDir.toString() + File.separator
                + ConstantParameters.FILENAMEOFFeatureVectorDataApp;
        String nlpFileName = tempFilesDir.toString() + File.separator
                + ConstantParameters.FILENAME_FEATURES_CSV;
        String modelFileName = wdResults.toString() + File.separator
                + ConstantParameters.FILENAME_OF_MODEL;
        // String labelInDataFileName = wdResults.toString() + File.separator
        // + ConstantParameters.FILENAMEOFLabelsInData;
        File dataFile = new File(fvFileName);
        File nlpDataFile = new File(nlpFileName);
        File modelFile = new File(modelFileName);
        int learnerType;
        learnerType = obtainLearnerType(engineSettings.learnerSettings.implementationName);
        int numClasses = 0;
        // Store the label information from the model application
        LabelsOfFeatureVectorDoc[] labelsFVDoc = null;
        short featureType = WekaLearning.SPARSEFVDATA;
        Map benchmarkingFeatures = new HashMap();
        switch (learnerType) {
            case 1: // for weka learner
                LogService.logMessage("Use weka learner.", 1);
                WekaLearning wekaL = new WekaLearning();
                // Check if the learner uses the sparse feaature vectors or NLP
                // features
                featureType = WekaLearning.obtainWekaLeanerDataType(engineSettings.learnerSettings.implementationName);
                long startTime = Benchmark.startPoint();
                switch (featureType) {
                    case WekaLearning.NLPFEATUREFVDATA:
                        wekaL.readNLPFeaturesFromFile(nlpDataFile, numDocs,
                                this.featuresList, false, labelsAndId.label2Id.size(),
                                engineSettings.surround);
                        break;
                    case WekaLearning.SPARSEFVDATA:
                        wekaL.readSparseFVsFromFile(dataFile, numDocs, false,
                                labelsAndId.label2Id.size(), engineSettings.surround);
                        break;
                }
                Benchmark.checkPoint(startTime, benchmarkID + "."
                        + "readingNlpFeatures", this, benchmarkingFeatures);
                // Check if the weka learner has distribute output of classify
                boolean distributionOutput = WekaLearning.obtainWekaLearnerOutputType(engineSettings.learnerSettings.implementationName);
                // Get the wekaLearner from the learnername
                WekaLearner wekaLearner = WekaLearning.obtainWekaLearner(
                        engineSettings.learnerSettings.implementationName,
                        engineSettings.learnerSettings.options);
                LogService.logMessage("Weka learner name: "
                        + wekaLearner.getLearnerName(), 1);
                // Application
                startTime = Benchmark.startPoint();
                benchmarkingFeatures.put("modelFile", modelFile.getAbsolutePath());
                wekaL.apply(wekaLearner, modelFile, distributionOutput);
                Benchmark.checkPoint(startTime, benchmarkID + "."
                        + "wekaModelApplication", this, benchmarkingFeatures);
                benchmarkingFeatures.remove("modelFile");
                labelsFVDoc = wekaL.labelsFVDoc;
                numClasses = labelsAndId.label2Id.size() * 2; // subtract the
                // null class
                break;
            case 2: // for learner of multi to binary conversion
                LogService.logMessage("Multi to binary conversion.", 1);
                // System.out.println("** multi to binary:");
                //      get a learner
                String learningCommand = engineSettings.learnerSettings.options;
                learningCommand = learningCommand.trim();
                learningCommand = learningCommand.replaceAll("[ \t]+", " ");
                String dataSetFile = null;
                SupervisedLearner paumLearner = MultiClassLearning.obtainLearnerFromName(engineSettings.learnerSettings.implementationName,
                        learningCommand, dataSetFile);
                paumLearner.setLearnerExecutable(engineSettings.learnerSettings.executableTraining);
                paumLearner.setLearnerParams(engineSettings.learnerSettings.options);
                LogService.logMessage("The learners: " + paumLearner.getLearnerName(),
                        1);
                MultiClassLearning chunkLearning = new MultiClassLearning(
                        engineSettings.multi2BinaryMode);
                // read data
                startTime = Benchmark.startPoint();
                //get the fv data
                File tempDataFile = new File(wdResults,
                        ConstantParameters.TempFILENAMEofFVData);
                boolean isUsingTempDataFile = false;
                //if(paumLearner.getLearnerName().equals("SVMExec"))
                //isUsingTempDataFile = true;
                chunkLearning.getDataFromFile(numDocs, dataFile, isUsingTempDataFile, tempDataFile);
                Benchmark.checkPoint(startTime, benchmarkID + "."
                        + "readingChunkLearningData", this, benchmarkingFeatures);
                // apply
                startTime = Benchmark.startPoint();
                benchmarkingFeatures.put("modelFile", modelFile.getAbsolutePath());
                //      using different method for one thread or multithread
                if (engineSettings.numThreadUsed > 1) //for using thread
                {
                    chunkLearning.apply(paumLearner, modelFile);
                } else //for not using thread
                {
                    chunkLearning.applyNoThread(paumLearner, modelFile);
                }

                Benchmark.checkPoint(startTime, benchmarkID + "."
                        + "paumModelApplication", this, benchmarkingFeatures);
                benchmarkingFeatures.remove("modelFile");
                labelsFVDoc = chunkLearning.dataFVinDoc.labelsFVDoc;
                numClasses = chunkLearning.numClasses;
                break;
            default:
                System.out.println("Error! Wrong learner type.");
                LogService.logMessage("Error! Wrong learner type.", 0);
        }
        if (engineSettings.surround) {
            String featName = engineSettings.datasetDefinition.arrs.classFeature;
            String instanceType = engineSettings.datasetDefinition.getInstanceType();
            labelsAndId = new Label2Id();
            labelsAndId.loadLabelAndIdFromFile(wdResults,
                    ConstantParameters.FILENAMEOFLabelList);
            // post-processing and add new annotation to the text
            PostProcessing postPr = new PostProcessing(
                    engineSettings.thrBoundaryProb, engineSettings.thrEntityProb,
                    engineSettings.thrClassificationProb);
            // System.out.println("** Application mode:");
            long startTime = Benchmark.startPoint();
            for (int i = 0; i < numDocs; ++i) {
                HashSet chunks = new HashSet();
                postPr.postProcessingChunk((short) 3, labelsFVDoc[i].multiLabels,
                        numClasses, chunks, chunkLenHash);
                // System.out.println("**
                // documentName="+((Document)corpus.get(i)).getName());
                Document toProcess = (Document) corpus.get(i + startDocId);
                addAnnsInDoc(toProcess, chunks, instanceType, featName, labelName,
                        labelsAndId);
                if (toProcess.getDataStore() != null && corpus.getDataStore() != null) {
                    corpus.getDataStore().sync(corpus);
                    //Factory.deleteResource(toProcess);
                }
            }
            Benchmark.checkPoint(startTime, benchmarkID + "." + "postProcessing",
                    this, benchmarkingFeatures);
        } else {
            String featName = engineSettings.datasetDefinition.arrs.classFeature;
            String instanceType = engineSettings.datasetDefinition.getInstanceType();
            labelsAndId = new Label2Id();
            labelsAndId.loadLabelAndIdFromFile(wdResults,
                    ConstantParameters.FILENAMEOFLabelList);
            // post-processing and add new annotation to the text
            // PostProcessing postPr = new PostProcessing(0.42, 0.2);
            PostProcessing postPr = new PostProcessing(
                    engineSettings.thrBoundaryProb, engineSettings.thrEntityProb,
                    engineSettings.thrClassificationProb);
            for (int i = 0; i < numDocs; ++i) {
                int[] selectedLabels = new int[labelsFVDoc[i].multiLabels.length];
                float[] valuesLabels = new float[labelsFVDoc[i].multiLabels.length];
                postPr.postProcessingClassification((short) 3,
                        labelsFVDoc[i].multiLabels, selectedLabels, valuesLabels);
                Document toProcess = (Document) corpus.get(i + startDocId);
                // Add the ranked label list and their scores, not just a single label
                // addLabelListInDocClassification(toProcess,
                // labelsFVDoc[i].multiLabels,
                // instanceType, featName, labelName, labelsAndId, engineSettings);
                addAnnsInDocClassification(toProcess, selectedLabels, valuesLabels,
                        instanceType, featName, labelName, labelsAndId, engineSettings);
                if (toProcess.getDataStore() != null && corpus.getDataStore() != null) {
                    corpus.getDataStore().sync(corpus);
                    //Factory.deleteResource(toProcess);
                }
            }
        }
    }

    /**
     * Apply the model to one document, also using the learning algorithm
     * implemented in Java.
     */
    public void applyModelInJavaPerDoc(Document doc, String labelName,
            LearningEngineSettings engineSettings, File tempFilesDir) throws GateException {
        LogService.logMessage("\nApplication starts.", 1);
        // The files for training data and model
        File wdResults = new File(wd, ConstantParameters.SUBDIRFORRESULTS);
        String fvFileName = tempFilesDir.toString() + File.separator
                + ConstantParameters.FILENAMEOFFeatureVectorData;
        String nlpFileName = tempFilesDir.toString() + File.separator
                + ConstantParameters.FILENAME_FEATURES_CSV;
        String modelFileName = wdResults.toString() + File.separator
                + ConstantParameters.FILENAME_OF_MODEL;
        // String labelInDataFileName = wdResults.toString() + File.separator
        // + ConstantParameters.FILENAMEOFLabelsInData;
        File dataFile = new File(fvFileName);
        File nlpDataFile = new File(nlpFileName);
        File modelFile = new File(modelFileName);
        int learnerType;
        learnerType = obtainLearnerType(engineSettings.learnerSettings.implementationName);
        int numClasses = 0;
        int numDocs = 1; // corpus.size();
        // Store the label information from the model application
        LabelsOfFeatureVectorDoc[] labelsFVDoc = null;
        short featureType = WekaLearning.SPARSEFVDATA;
        switch (learnerType) {
            case 1: // for weka learner
                LogService.logMessage("Use weka learner.", 1);
                WekaLearning wekaL = new WekaLearning();
                // Check if the learner uses the sparse feaature vectors or NLP
                // features
                featureType = WekaLearning.obtainWekaLeanerDataType(engineSettings.learnerSettings.implementationName);
                switch (featureType) {
                    case WekaLearning.NLPFEATUREFVDATA:
                        wekaL.readNLPFeaturesFromFile(nlpDataFile, numDocs,
                                this.featuresList, false, labelsAndId.label2Id.size(),
                                engineSettings.surround);
                        break;
                    case WekaLearning.SPARSEFVDATA:
                        wekaL.readSparseFVsFromFile(dataFile, numDocs, false,
                                labelsAndId.label2Id.size(), engineSettings.surround);
                        break;
                }
                // Check if the weka learner has distribute output of classify
                boolean distributionOutput = WekaLearning.obtainWekaLearnerOutputType(engineSettings.learnerSettings.implementationName);
                // Get the wekaLearner from the learnername
                WekaLearner wekaLearner = WekaLearning.obtainWekaLearner(
                        engineSettings.learnerSettings.implementationName,
                        engineSettings.learnerSettings.options);
                LogService.logMessage("Weka learner name: "
                        + wekaLearner.getLearnerName(), 1);
                // Training.
                wekaL.apply(wekaLearner, modelFile, distributionOutput);
                labelsFVDoc = wekaL.labelsFVDoc;
                numClasses = labelsAndId.label2Id.size() * 2; // subtract the
                // null class
                break;
            case 2: // for learner of multi to binary conversion
                LogService.logMessage("Multi to binary conversion.", 1);
                // get a learner
                String learningCommand = engineSettings.learnerSettings.options;
                learningCommand = learningCommand.trim();
                learningCommand = learningCommand.replaceAll("[ \t]+", " ");
                String dataSetFile = null;
                SupervisedLearner paumLearner = MultiClassLearning.obtainLearnerFromName(engineSettings.learnerSettings.implementationName,
                        learningCommand, dataSetFile);
                paumLearner.setLearnerExecutable(engineSettings.learnerSettings.executableTraining);
                paumLearner.setLearnerParams(engineSettings.learnerSettings.options);
                LogService.logMessage("The learners: " + paumLearner.getLearnerName(),
                        1);
                // System.out.println("** multi to binary:");
                MultiClassLearning chunkLearning = new MultiClassLearning(
                        engineSettings.multi2BinaryMode);
                // read data
                //      get the fv data
                File tempDataFile = new File(tempFilesDir,
                        ConstantParameters.TempFILENAMEofFVData);
                boolean isUsingTempDataFile = false;
                //if(paumLearner.getLearnerName().equals("SVMExec"))
                //isUsingTempDataFile = true;
                chunkLearning.getDataFromFile(numDocs, dataFile, isUsingTempDataFile, tempDataFile);
                // dataFile);

                // apply
                //      using different method for one thread or multithread
                if (engineSettings.numThreadUsed > 1) //for using thread
                {
                    chunkLearning.apply(paumLearner, modelFile);
                } else //for not using thread
                {
                    chunkLearning.applyNoThread(paumLearner, modelFile);
                }
                labelsFVDoc = chunkLearning.dataFVinDoc.labelsFVDoc;
                numClasses = chunkLearning.numClasses;
                break;
            default:
                System.out.println("Error! Wrong learner type.");
                LogService.logMessage("Error! Wrong learner type.", 0);
        }
        if (engineSettings.surround) {
            String featName = engineSettings.datasetDefinition.arrs.classFeature;
            String instanceType = engineSettings.datasetDefinition.getInstanceType();
            labelsAndId = new Label2Id();
            labelsAndId.loadLabelAndIdFromFile(wdResults,
                    ConstantParameters.FILENAMEOFLabelList);
            // post-processing and add new annotation to the text
            PostProcessing postPr = new PostProcessing(
                    engineSettings.thrBoundaryProb, engineSettings.thrEntityProb,
                    engineSettings.thrClassificationProb);
            // System.out.println("** Application mode:");
            for (int i = 0; i < numDocs; ++i) {
                HashSet chunks = new HashSet();
                postPr.postProcessingChunk((short) 3, labelsFVDoc[i].multiLabels,
                        numClasses, chunks, chunkLenHash);
                // System.out.println("**
                // documentName="+((Document)corpus.get(i)).getName());
                addAnnsInDoc(doc, chunks, instanceType, featName, labelName,
                        labelsAndId);
            }
        } else {
            String featName = engineSettings.datasetDefinition.arrs.classFeature;
            String instanceType = engineSettings.datasetDefinition.getInstanceType();
            labelsAndId = new Label2Id();
            labelsAndId.loadLabelAndIdFromFile(wdResults,
                    ConstantParameters.FILENAMEOFLabelList);
            // post-processing and add new annotation to the text
            // PostProcessing postPr = new PostProcessing(0.42, 0.2);
            PostProcessing postPr = new PostProcessing(
                    engineSettings.thrBoundaryProb, engineSettings.thrEntityProb,
                    engineSettings.thrClassificationProb);
            for (int i = 0; i < numDocs; ++i) { // numDocs is always 1
                int[] selectedLabels = new int[labelsFVDoc[i].multiLabels.length];
                float[] valuesLabels = new float[labelsFVDoc[i].multiLabels.length];
                postPr.postProcessingClassification((short) 3,
                        labelsFVDoc[i].multiLabels, selectedLabels, valuesLabels);
                addAnnsInDocClassification(doc, selectedLabels, valuesLabels,
                        instanceType, featName, labelName, labelsAndId, engineSettings);
            }
        }
    }

    /**
     * Add the annotation into documents for chunk learning.
     *
     * @throws InvalidOffsetException
     */
    private void addAnnsInDoc(Document doc, HashSet chunks, String instanceType,
            String featName, String labelName, Label2Id labelsAndId)
            throws InvalidOffsetException {
        AnnotationSet annsDoc = null;
        if (inputASName == null || inputASName.trim().length() == 0) {
            annsDoc = doc.getAnnotations();
        } else {
            annsDoc = doc.getAnnotations(inputASName);
        }
        AnnotationSet annsDocResults = null;
        if (outputASName == null || outputASName.trim().length() == 0) {
            annsDocResults = doc.getAnnotations();
        } else {
            annsDocResults = doc.getAnnotations(outputASName);
        }
        AnnotationSet anns = annsDoc.get(instanceType);
        ArrayList annotationArray = (anns == null || anns.isEmpty())
                ? new ArrayList()
                : new ArrayList(anns);
        Collections.sort(annotationArray, new OffsetComparator());
        for (Object obj : chunks) {
            ChunkOrEntity entity = (ChunkOrEntity) obj;
            FeatureMap features = Factory.newFeatureMap();
            features.put(featName, labelsAndId.id2Label.get(
                    new Integer(entity.name).toString()).toString());
            features.put("prob", entity.prob);
            Annotation token1 = (Annotation) annotationArray.get(entity.start);
            Annotation token2 = (Annotation) annotationArray.get(entity.end);
            Node entityS = token1.getStartNode();
            Node entityE = token2.getEndNode();
            if (entityS != null && entityE != null) {
                annsDocResults.add(entityS.getOffset(), entityE.getOffset(), labelName,
                        features);
            }
        }
    }

    /**
     * Add the annotation into documents for classification.
     *
     * @throws InvalidOffsetException
     */
    private void addAnnsInDocClassification(Document doc, int[] selectedLabels,
            float[] valuesLabels, String instanceType, String featName,
            String labelName, Label2Id labelsAndId,
            LearningEngineSettings engineSettings) throws InvalidOffsetException {
        AnnotationSet annsDoc = null;
        if (inputASName == null || inputASName.trim().length() == 0) {
            annsDoc = doc.getAnnotations();
        } else {
            annsDoc = doc.getAnnotations(inputASName);
        }
        AnnotationSet annsDocResults = null;
        if (outputASName == null || outputASName.trim().length() == 0) {
            annsDocResults = doc.getAnnotations();
        } else {
            annsDocResults = doc.getAnnotations(outputASName);
        }
        AnnotationSet annsLabel = annsDoc.get(labelName);
        AnnotationSet anns = annsDoc.get(instanceType);
        ArrayList annotationArray = (anns == null || anns.isEmpty())
                ? new ArrayList()
                : new ArrayList(anns);
        Collections.sort(annotationArray, new OffsetComparator());
        // For the relation extraction
        String arg1F = null;
        String arg2F = null;
        if (engineSettings.datasetDefinition.dataType == DataSetDefinition.RelationData) {
            AttributeRelation relAtt = (AttributeRelation) engineSettings.datasetDefinition.classAttribute;
            arg1F = relAtt.getArg1();
            arg2F = relAtt.getArg2();
        }
        for (int i = 0; i < annotationArray.size(); ++i) {
            if (selectedLabels[i] < 0) {
                continue;
            }
            FeatureMap features = Factory.newFeatureMap();
            features.put(featName, labelsAndId.id2Label.get(
                    new Integer(selectedLabels[i] + 1).toString()).toString());
            features.put("prob", valuesLabels[i]);
            Annotation ann = (Annotation) annotationArray.get(i);
            // For relation data, need the argument features
            if (engineSettings.datasetDefinition.dataType == DataSetDefinition.RelationData) {
                String arg1V = ann.getFeatures().get(
                        engineSettings.datasetDefinition.arg1Feat).toString();
                String arg2V = ann.getFeatures().get(
                        engineSettings.datasetDefinition.arg2Feat).toString();
                features.put(arg1F, arg1V);
                features.put(arg2F, arg2V);
            }
            // FeatureMap featO = ann.getFeatures();
            // for(Object obj:features.keySet()) {
            // if(featO.containsKey(obj))
            // featO.put(obj.toString()+"_results", features.get(obj));
            // else featO.put(obj, features.get(obj));
            // }
            annsDocResults.add(ann.getStartNode().getOffset(), ann.getEndNode().getOffset(), labelName, features);
        }
    }

    /**
     * Add a ranked list of label for each example in documents for
     * classification, not just a single label.
     *
     * @throws InvalidOffsetException
     */
    private void addLabelListInDocClassification(Document doc,
            LabelsOfFV[] multiLabels, String instanceType, String featName,
            String labelName, Label2Id labelsAndId,
            LearningEngineSettings engineSettings) throws InvalidOffsetException {
        AnnotationSet annsDoc = null;
        if (inputASName == null || inputASName.trim().length() == 0) {
            annsDoc = doc.getAnnotations();
        } else {
            annsDoc = doc.getAnnotations(inputASName);
        }
        AnnotationSet annsDocResults = null;
        if (outputASName == null || outputASName.trim().length() == 0) {
            annsDocResults = doc.getAnnotations();
        } else {
            annsDocResults = doc.getAnnotations(outputASName);
        }
        AnnotationSet anns = annsDoc.get(instanceType);
        ArrayList annotationArray = (anns == null || anns.isEmpty())
                ? new ArrayList()
                : new ArrayList(anns);
        Collections.sort(annotationArray, new OffsetComparator());
        // For the relation extraction
        String arg1F = null;
        String arg2F = null;
        if (engineSettings.datasetDefinition.dataType == DataSetDefinition.RelationData) {
            AttributeRelation relAtt = (AttributeRelation) engineSettings.datasetDefinition.classAttribute;
            arg1F = relAtt.getArg1();
            arg2F = relAtt.getArg2();
        }
        for (int i = 0; i < annotationArray.size(); ++i) {
            int len = multiLabels[i].num;
            int[] indexSort = new int[len];
            sortFloatAscIndex(multiLabels[i].probs, indexSort, len, len);
            // get the labels and their scores
            StringBuffer strB = new StringBuffer();
            for (int j = 0; j < len; ++j) {
                String label = labelsAndId.id2Label.get(
                        new Integer(indexSort[j] + 1).toString()).toString();
                strB.append(label + ":" + multiLabels[i].probs[indexSort[j]] + " ");
            }
            FeatureMap features = Factory.newFeatureMap();
            features.put(featName, strB.toString().trim());
            // features.put("prob", valuesLabels[i]);
            Annotation ann = (Annotation) annotationArray.get(i);
            // For relation data, need the argument features
            if (engineSettings.datasetDefinition.dataType == DataSetDefinition.RelationData) {
                String arg1V = ann.getFeatures().get(
                        engineSettings.datasetDefinition.arg1Feat).toString();
                String arg2V = ann.getFeatures().get(
                        engineSettings.datasetDefinition.arg2Feat).toString();
                features.put(arg1F, arg1V);
                features.put(arg2F, arg2V);
            }
            FeatureMap featO = ann.getFeatures();
            for (Object obj : features.keySet()) {
                // if(featO.containsKey(obj))
                featO.put(obj.toString() + "_resultsList", features.get(obj));
                // else featO.put(obj, features.get(obj));
            }
            // FeatureMap featAdd = ann.
            // for(Object obj:featO.keySet()) {
            // annsDocResults.add(ann.).getFeatures().put(obj, featO.get(obj));
            // }
            annsDocResults.add(ann.getStartNode().getOffset(), ann.getEndNode().getOffset(), labelName, featO);
            // annsDoc.add(ann.getStartNode(), ann.getEndNode(), labelName, features);
        }
    }

    /** Convert the string labels in the nlp data file into the index labels. */
    public void convertNLPLabelsTDataLabel(File nlpDataFile, File dataFile,
            File labelInDataFile, File nlpDataLabelFile, int numDocs,
            boolean surroundingMode) {
        try {
            BufferedReader inData = new BomStrippingInputStreamReader(
                    new FileInputStream(dataFile), "UTF-8");
            BufferedReader inNlpData = new BomStrippingInputStreamReader(
                    new FileInputStream(nlpDataFile), "UTF-8");
            BufferedWriter outNlpDataLabel = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(nlpDataLabelFile), "UTF-8"));
            HashSet uniqueLabels = new HashSet();
            // The head line of NLP feature file
            String line = inNlpData.readLine();
            outNlpDataLabel.append(line);
            outNlpDataLabel.newLine();
            String[] items;
            // For each document
            for (int iDoc = 0; iDoc < numDocs; ++iDoc) {
                int numLabels;
                line = inData.readLine();
                items = line.split(ConstantParameters.ITEMSEPARATOR);
                int num = Integer.parseInt(items[1]);
                line = inNlpData.readLine();
                items = line.split(ConstantParameters.ITEMSEPARATOR);
                line = items[0] + ConstantParameters.ITEMSEPARATOR + items[1] + ConstantParameters.ITEMSEPARATOR
                        + +num;
                outNlpDataLabel.append(line);
                outNlpDataLabel.newLine();
                // For each instance
                for (int i = 0; i < num; ++i) {
                    // Read the line from data file and get the data label
                    line = inData.readLine();
                    items = line.split(ConstantParameters.ITEMSEPARATOR);
                    numLabels = Integer.parseInt(items[1]);
                    StringBuffer labels = new StringBuffer();
                    labels.append(items[1]);

                    //System.out.println("line="+line+", items[1]="+items[1]);

                    for (int j = 0; j < numLabels; ++j) {
                        labels.append(ConstantParameters.ITEMSEPARATOR);
                        labels.append(items[j + 2]);
                        if (!uniqueLabels.contains(items[j + 2])) {
                            uniqueLabels.add(items[j + 2]);
                        }
                    }
                    outNlpDataLabel.append(labels.toString());
                    // Read the line from NLP feature and get the features
                    line = inNlpData.readLine();
                    items = line.split(ConstantParameters.ITEMSEPARATOR);
                    numLabels = Integer.parseInt(items[0]);
                    StringBuffer nlpFeats = new StringBuffer();
                    for (int j = numLabels + 1; j < items.length; ++j) {
                        nlpFeats.append(ConstantParameters.ITEMSEPARATOR);
                        nlpFeats.append(items[j]);
                    }
                    outNlpDataLabel.append(nlpFeats);
                    outNlpDataLabel.newLine();
                }
            }
            outNlpDataLabel.flush();
            outNlpDataLabel.close();
            inData.close();
            inNlpData.close();
            BufferedWriter labelInData = new BufferedWriter(new OutputStreamWriter(
                    new FileOutputStream(labelInDataFile), "UTF-8"));
            labelInData.append(uniqueLabels.size() + " #total_labels");
            labelInData.newLine();
            for (Object obj : uniqueLabels) {
                labelInData.append(obj.toString());
                labelInData.newLine();
            }
            labelInData.flush();
            labelInData.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * Flitering out the negative examples of the training data using the SVM.
     *
     * @throws GateException
     */
    public void FilteringNegativeInstsInJava(int numDocs,
            LearningEngineSettings engineSettings) throws GateException {
        LogService.logMessage("\nFiltering starts.", 1);
        // The files for training data and model
        File wdResults = new File(wd, ConstantParameters.SUBDIRFORRESULTS);
        String fvFileName = wdResults.toString() + File.separator
                + ConstantParameters.FILENAMEOFFeatureVectorData;
        String modelFileName = wdResults.toString() + File.separator
                + ConstantParameters.FILENAME_OF_MODEL;
        File dataFile = new File(fvFileName);
        File modelFile = new File(modelFileName);
        // for learner of multi to binary conversion
        LogService.logMessage("Multi to binary conversion.", 1);
        MultiClassLearning chunkLearning = new MultiClassLearning(
                engineSettings.multi2BinaryMode);
        // read data
        File tempDataFile = new File(wdResults,
                ConstantParameters.TempFILENAMEofFVData);
        boolean isUsingTempDataFile = false;
        //if(paumLearner.getLearnerName().equals("SVMExec"))
        //isUsingTempDataFile = true; //using the temp data file
        chunkLearning.getDataFromFile(numDocs, dataFile, isUsingTempDataFile, tempDataFile);
        // Back up the label data before it is reset.
        LabelsOfFeatureVectorDoc[] labelsFVDocB = new LabelsOfFeatureVectorDoc[numDocs];
        for (int i = 0; i < numDocs; ++i) {
            labelsFVDocB[i] = new LabelsOfFeatureVectorDoc();
            labelsFVDocB[i].multiLabels = new LabelsOfFV[chunkLearning.dataFVinDoc.labelsFVDoc[i].multiLabels.length];
            for (int j = 0; j < chunkLearning.dataFVinDoc.labelsFVDoc[i].multiLabels.length; ++j) {
                labelsFVDocB[i].multiLabels[j] = chunkLearning.dataFVinDoc.labelsFVDoc[i].multiLabels[j];
            }
        }
        // Reset the class label of data for binary class for filtering
        // purpose
        int numNeg; // number of negative example in the training data
        numNeg = chunkLearning.resetClassInData();
        if (numNeg == 0) {// No negative example (null lable) at all in training
            // data
            LogService.logMessage(
                    "!Cannot do the filtering, because there is no negative examples"
                    + "in the training data for filtering!", 1);
            if (LogService.maxVerbosityLevel > 0) {
                System.out.println("!Cannot do the filtering, because there is no negative examples"
                        + "in the training data for filtering!");
            }
            return;
        }
        LogService.logMessage("The number of classes in dataset: "
                + chunkLearning.numClasses, 1);
        // Use the SVM only for filtering
        String dataSetFile = null;
        SupervisedLearner paumLearner = MultiClassLearning.obtainLearnerFromName(
                "SVMLibSvmJava", "-c 1.0 -t 0 -m 100 -tau 1.0 ", dataSetFile);
        paumLearner.setLearnerExecutable(engineSettings.learnerSettings.executableTraining);
        paumLearner.setLearnerParams(engineSettings.learnerSettings.options);
        LogService.logMessage("The learners: " + paumLearner.getLearnerName(), 1);
        // training
        // if number of classes is zero, not filtering at all
        if (chunkLearning.numClasses == 0) {
            LogService.logMessage(
                    "!Cannot do the filtering, because there is no positive examples"
                    + "in the training data!", 1);
            if (LogService.maxVerbosityLevel > 0) {
                System.out.println("!Cannot do the filtering, because there is no positive examples"
                        + "in the training data!");
            }
            return;
        }
        //  using different method for one thread or multithread
        if (engineSettings.numThreadUsed > 1) {//using thread
            chunkLearning.training(paumLearner, modelFile);
            // applying the learning model to training example and get the
            // confidence score for each example
            chunkLearning.apply(paumLearner, modelFile);
        } else { //not using thread
            chunkLearning.trainingNoThread(paumLearner, modelFile, isUsingTempDataFile, tempDataFile);
            chunkLearning.applyNoThread(paumLearner, modelFile);
        }

        // Store the scores of negative examples.
        float[] scoresNegB = new float[numNeg];
        float[] scoresNeg = new float[numNeg];
        int kk = 0;
        for (int i = 0; i < labelsFVDocB.length; ++i) {
            for (int j = 0; j < labelsFVDocB[i].multiLabels.length; ++j) {
                if (labelsFVDocB[i].multiLabels[j].num == 0) {
                    // System.out.println("(i, j, kk)="+i+","+j+","+kk+"*");
                    scoresNeg[kk++] = chunkLearning.dataFVinDoc.labelsFVDoc[i].multiLabels[j].probs[0];
                }
            }
        }
        // If want to remove the negative that are close to positive one,
        // reverse the scores.
        if (engineSettings.filteringNear) {
            for (int i = 0; i < numNeg; ++i) {
                scoresNeg[i] = -scoresNeg[i];
            }
        }
        // Back up the score before sorting
        for (int i = 0; i < numNeg; ++i) {
            scoresNegB[i] = scoresNeg[i];
        }
        // Sort those scores
        Arrays.sort(scoresNeg);
        // int index = numNeg -
        // (int)Math.floor(numNeg*engineSettings.filteringRatio);
        int index = (int) Math.floor(numNeg * engineSettings.filteringRatio);
        if (index >= numNeg) {
            index = numNeg - 1;
        }
        if (index < 0) {
            index = 0;
        }
        float thrFiltering = scoresNeg[index];
        boolean[] isFiltered = new boolean[numNeg];
        for (int i = 0; i < numNeg; ++i) {
            if (scoresNegB[i] < thrFiltering) {
                isFiltered[i] = true;
            } else {
                isFiltered[i] = false;
            }
        }
        // Write the filtered data into the data file
        BufferedWriter out;
        try {
            out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
                    dataFile), "UTF-8"));
            numNeg = 0;
            for (int i = 0; i < labelsFVDocB.length; ++i) {
                int kk1 = 0;
                int numK = 0; // num of instances in the doc to be kept
                for (int j = 0; j < labelsFVDocB[i].multiLabels.length; ++j) {
                    if (labelsFVDocB[i].multiLabels[j].num == 0) {
                        if (!isFiltered[numNeg + kk1]) {
                            ++numK;
                        }
                        ++kk1;
                    } else {
                        ++numK;
                    }
                }
                out.write(i + ConstantParameters.ITEMSEPARATOR + numK
                        + ConstantParameters.ITEMSEPARATOR
                        + chunkLearning.dataFVinDoc.trainingFVinDoc[i].docId);
                out.newLine();
                kk1 = 0;
                for (int j = 0; j < labelsFVDocB[i].multiLabels.length; ++j) {
                    if (labelsFVDocB[i].multiLabels[j].num > 0
                            || !isFiltered[numNeg + kk1]) {
                        StringBuffer line = new StringBuffer();
                        line.append(j + ConstantParameters.ITEMSEPARATOR
                                + labelsFVDocB[i].multiLabels[j].num);
                        for (int j1 = 0; j1 < labelsFVDocB[i].multiLabels[j].num; ++j1) {
                            line.append(ConstantParameters.ITEMSEPARATOR
                                    + labelsFVDocB[i].multiLabels[j].labels[j1]);
                        }
                        SparseFeatureVector fv = chunkLearning.dataFVinDoc.trainingFVinDoc[i].fvs[j];
                        for (int j1 = 0; j1 < fv.len; ++j1) {
                            line.append(ConstantParameters.ITEMSEPARATOR + fv.nodes[j1].index
                                    + ConstantParameters.INDEXVALUESEPARATOR + fv.nodes[j1].value);
                        }
                        out.write(line.toString());
                        out.newLine();
                    }
                    if (labelsFVDocB[i].multiLabels[j].num == 0) {
                        ++kk1;
                    }
                }
                numNeg += kk1;
            }
            out.flush();
            out.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * Determining the type of the learner, it is from Weka or not.
     */
    public static int obtainLearnerType(String learnerName) throws GateException {
        if (learnerName.equals("SVMLibSvmJava") || learnerName.equals("C4.5Weka")
                || learnerName.equals("KNNWeka") || learnerName.equals("NaiveBayesWeka")
                || learnerName.equals("PAUM") || learnerName.equals("SVMExec")
                || learnerName.equals("PAUMExec")) {
            if (learnerName.endsWith("Weka")) {
                return 1;
            } else {
                return 2;
            }
        } else {
            throw new GateException("The learning engine named as \"" + learnerName
                    + "\" is not defined!");
        }
    }

    /** Convert and view the SVM models in term of NLP features. */
    public void viewSVMmodelsInNLPFeatures(File modelFile,
            LearningEngineSettings engineSettings) {
        try {
            // First decide if the learning model is linear one or not
            if (!(engineSettings.learnerSettings.implementationName.equalsIgnoreCase("SVMLibSvmJava") && (!engineSettings.learnerSettings.options.contains("-t ") || engineSettings.learnerSettings.options.contains("-t 0")))) {
                System.out.println("According to the configuration file, "
                        + "the model file is not linear svm model. Hence it cannot be displayed in the "
                        + "current implementation!");
                return;
            }
            int numP0, numN0;
            boolean surroundMode;
            numP0 = engineSettings.numPosSVMModel;
            numN0 = engineSettings.numNegSVMModel;
            surroundMode = engineSettings.surround;
            // Open the mode file and read the model
            HashMap featId2Form = new HashMap();
            for (Object obj : featuresList.featuresList.keySet()) {
                int k = Integer.parseInt(featuresList.featuresList.get(obj).toString());
                featId2Form.put(k, obj);
            }
            // Need some methods from MultiClassLearning
            MultiClassLearning mulL = new MultiClassLearning();
            // Open the mode file and read the model
            if (modelFile.exists() && !modelFile.isDirectory()) {
                // see whether we're trying to apply an old-style model
                // stored all in one file
                BufferedReader buff = new BomStrippingInputStreamReader(
                        new FileInputStream(modelFile), "UTF-8");
                String firstLine = buff.readLine();
                buff.close();
                if (firstLine != null && firstLine.endsWith("#numTrainingDocs")) {
                    // this is an old-style model, so try and transparently upgrade it to
                    // the new format
                    mulL.upgradeSingleFileModelToDirectory(modelFile);
                } else {
                    throw new IOException("Unrecognised model file format for file "
                            + modelFile);
                }
            }
            if (!modelFile.exists()) {
                throw new IllegalStateException(
                        "Model directory " + modelFile + " does not exist");
            }
            //Read the training meta information from the meta data file
            // include the total number of features and number of tags (numClasses)
            File metaDataFile = new File(modelFile, ConstantParameters.FILENAMEOFModelMetaData);
            BufferedReader metaDataBuff = new BufferedReader(new InputStreamReader(
                    new FileInputStream(metaDataFile), "UTF-8"));
            int totalNumFeatures;
            String learnerNameFromModel = null;
            // note that reading the training meta data also read the number of class
            // in the model, e.g. changing the numClasses.
            totalNumFeatures = mulL.ReadTrainingMetaData(metaDataBuff,
                    learnerNameFromModel);
            metaDataBuff.close();
            // for each class
            int classIndex = 1;
            for (int iClass = 0; iClass < mulL.numClasses; ++iClass) {
                float b;
                float[] w = new float[totalNumFeatures];
                //Read the model file
                final File thisClassModelFile = new File(modelFile, String.format(
                        ConstantParameters.FILENAMEOFPerClassModel, Integer.valueOf(classIndex++)));
                BufferedReader modelBuff = new BomStrippingInputStreamReader(
                        new FileInputStream(thisClassModelFile), "UTF-8");
                //Read the header line
                String items[] = modelBuff.readLine().split(" ");
                // Get the weight vector
                b = SvmLibSVM.readWeightVectorFromFile(modelBuff, w);
                modelBuff.close();
                int numT;
                int numP;
                if (numP0 > 0) {
                    numT = 0;
                    for (int i = 1; i < totalNumFeatures; ++i) {
                        if (w[i] >= 0) {
                            ++numT;
                        }
                    }
                    float[] wP = new float[numT];
                    int[] indexW = new int[numT];
                    numT = 0;
                    for (int i = 1; i < totalNumFeatures; ++i) {
                        if (w[i] >= 0) {
                            wP[numT] = w[i];
                            indexW[numT] = i;
                            ++numT;
                        }
                    }
                    if (numP0 > numT) {
                        numP = numT;
                    } else {
                        numP = numP0;
                    }
                    int[] indexSort = new int[numP];
                    sortFloatAscIndex(wP, indexSort, numT, numP);
                    String st1 = null;
                    if (surroundMode) {
                        st1 = labelsAndId.id2Label.get(
                                new Integer(iClass / 2 + 1).toString()).toString();
                        if (iClass % 2 == 0) {
                            st1 += "-StartToken";
                        } else {
                            st1 += "-LastToken";
                        }
                    } else {
                        st1 = labelsAndId.id2Label.get(new Integer(iClass + 1).toString()).toString();
                    }
                    System.out.println("The " + numP
                            + " most significiant positive NLP feature for class:   " + st1
                            + ":");
                    for (int i = 0; i < numP; ++i) {
                        int k1 = indexW[indexSort[i]]
                                / (int) ConstantParameters.MAXIMUMFEATURES;
                        int k2 = indexW[indexSort[i]]
                                % (int) ConstantParameters.MAXIMUMFEATURES;
                        String st = null;
                        if (k1 == 0) {
                            st = "0";
                        } else {
                            if (k1 <= maxNegPositionTotal) {
                                st = "-" + k1;
                            } else {
                                k1 -= maxNegPositionTotal;
                                st = "+" + k1;
                            }
                        }
                        st += " " + featId2Form.get(k2).toString();
                        System.out.println(i + ": " + st + " -- " + wP[indexSort[i]]);
                    }
                }
                // For the negative weight
                if (numN0 > 0) {
                    System.out.println("Negative weight:");
                    numT = 0;
                    for (int i = 0; i < totalNumFeatures; ++i) {
                        if (w[i] < 0) {
                            ++numT;
                        }
                    }
                    float[] wN = new float[numT];
                    int[] indexWN = new int[numT];
                    numT = 0;
                    for (int i = 0; i < totalNumFeatures; ++i) {
                        if (w[i] < 0) {
                            wN[numT] = -w[i];
                            indexWN[numT] = i;
                            ++numT;
                        }
                    }
                    if (numN0 > numT) {
                        numP = numT;
                    } else {
                        numP = numN0;
                    }
                    int[] indexSortN = new int[numP];
                    sortFloatAscIndex(wN, indexSortN, numT, numP);
                    String st1 = null;
                    if (surroundMode) {
                        st1 = labelsAndId.id2Label.get(
                                new Integer(iClass / 2 + 1).toString()).toString();
                        if (iClass % 2 == 0) {
                            st1 += "-StartToken";
                        } else {
                            st1 += "-LastToken";
                        }
                    } else {
                        st1 = labelsAndId.id2Label.get(new Integer(iClass + 1).toString()).toString();
                    }
                    System.out.println("The " + numP
                            + " most significiant negative NLP feature for class:   " + st1
                            + ":");
                    for (int i = 0; i < numP; ++i) {
                        int k1 = indexWN[indexSortN[i]]
                                / (int) ConstantParameters.MAXIMUMFEATURES;
                        int k2 = indexWN[indexSortN[i]]
                                % (int) ConstantParameters.MAXIMUMFEATURES;
                        String st = null;
                        if (k1 == 0) {
                            st = "0";
                        } else {
                            if (k1 <= maxNegPositionTotal) {
                                st = "-" + k1;
                            } else {
                                k1 -= maxNegPositionTotal;
                                st = "+" + k1;
                            }
                        }
                        st += " " + featId2Form.get(k2).toString();
                        float wN0 = -wN[indexSortN[i]];
                        System.out.println(i + ": " + st + " -- " + wN0);
                    }
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * Get the biggest numK components from an array. wP: the array storing the
     * numbers for sorting, indexSort stores the indices of the numK biggest
     * numbers numT, the number of those numbers.
     */
    public static void sortFloatAscIndex(float[] wP, int[] indexSort, int numT,
            int numK) {
        int i, j, k, j1;
        float[] rp1 = new float[numK];
        float one;
        if (numT <= 0) {
            return;
        }
        rp1[0] = wP[0];
        indexSort[0] = 0;
        for (i = 1; i < numT; ++i) {
            one = wP[i];
            if (i >= numK) {
                j1 = numK - 1;
            } else {
                j1 = i - 1;
            }
            k = j1;
            for (j = j1; j > -1; --j) {
                if (one > rp1[j]) {
                    --k;
                } else {
                    break;
                }
            }
            if (i < numK) {
                rp1[i] = rp1[i - 1];
                indexSort[i] = indexSort[i - 1];
            }
            ++k;
            for (j = j1; j > k; --j) {
                rp1[j] = rp1[j - 1];
                indexSort[j] = indexSort[j - 1];
            }
            if (k < numK) {
                rp1[k] = one;
                indexSort[k] = i;
            }
        }
    }
    // /////// Benchmarkable ////////////////
    private String parentBenchmarkID;
    private String benchmarkID;

    /**
     * Returns the benchmark ID of the parent of this resource.
     *
     * @return
     */
    public String getParentBenchmarkId() {
        return this.parentBenchmarkID;
    }

    /**
     * Returns the benchmark ID of this resource.
     *
     * @return
     */
    public String getBenchmarkId() {
        if (this.benchmarkID == null) {
            benchmarkID = "LightWeightLearningApi";
        }
        return this.benchmarkID;
    }

    /**
     * Given an ID of the parent resource, this method is responsible for
     * producing the Benchmark ID, unique to this resource.
     *
     * @param parentID
     */
    public void createBenchmarkId(String parentID) {
        parentBenchmarkID = parentID;
        benchmarkID = Benchmark.createBenchmarkId("LightWeightLearningApi",
                parentID);
    }

    /**
     * This method sets the benchmarkID for this resource.
     *
     * @param benchmarkID
     */
    public void setParentBenchmarkId(String benchmarkID) {
        parentBenchmarkID = benchmarkID;
    }

    /**
     * Returns the logger object being used by this resource.
     *
     * @return
     */
    public Logger getLogger() {
        return Benchmark.logger;
    }

    public void setBenchmarkId(String arg0) {
        // stub
    }
}
